# Script to replicate analyses presented in:
# Kathryn Sikkink, Helen Clapp, Daniel Marin-Lopez, and Averell Schmidt
# "Gender and Transitional Justice: Explaining Global Trends"
# International Journal of Transitional Justice 

# Last Updated: June 4, 2024

# 1. Set Up ----

# Install required package
# tidyverse (Version 1.3.1)
if((!"tidyverse" %in% installed.packages()) | # If package is not installed 
   packageVersion("tidyverse") != "1.3.1"){ # Or the installed version is not the correct version
  # Install and load correct version of package
  package_url <- "https://cran.r-project.org/src/contrib/Archive/tidyverse/tidyverse_1.3.1.tar.gz"
  install.packages(package_url, repos=NULL, type="source")
  library("tidyverse")
} else { # Otherwise, load package
  library("tidyverse")
}

# 2. Country-Context Table ----

tjet <- read.csv("data/tjet_cy.csv") # Import country-year data
tjet <- subset(tjet, dtr == 1 | aco == 1) # Sebset by transitional context keys
tjet_sum <- tjet |> group_by(country_case) |> # Summarize data
  summarise(
    year_min = min(year),
    year_max = max(year),
    n = n(),
    year_n = year_max - year_min + 1,
    check = n == year_n
  )
tjet_sum$year_max[tjet_sum$year_max == 2023] <- 2020 # Fix end year
tjet_sum$country_case <- paste0(tjet_sum$country_case, " (", # Create label
                                str_c(tjet_sum$year_min, tjet_sum$year_max, sep = " - "), 
                                ")")
write.csv(tjet_sum$country_case, "transitional_contexts_table.csv", row.names = F) # Export as CSV
# CSV file is then edited for presentation in the appendix


# 3. By Country-Context (Analyses in Article) -----

# Define Country Context Sample key based on aco and dtr
tjet_cy <- read.csv("data/tjet_cy.csv") # Load data
tjet_cy <- tjet_cy |> mutate(cy_key = ccode_cow  * 10000 + year) # Create key 
dtr_aco_key <- tjet_cy$cy_key[tjet_cy$dtr == 1 | tjet_cy$aco == 1] # Assign key to vector

# Trials

trials <- read.csv("data/tjet_trials.csv") # Load full trials data set
trials <- trials |> filter(yearStart < 2021) # Select pre-2021 years with complete data
nrow(trials) # Total number of trials 
sum(trials$anyStateAgent) # Total number of trials of state agents
summary(trials$yearStart) # minimum/maximum year 
trials <- trials |> mutate(cy_key = ccode_Accused * 10000 + yearStart) # Define key in trials data
trials <- subset(trials, trials$cy_key %in%  dtr_aco_key) # Subset to sample
nrow(trials) # Total number of TJ trials 

regions <- read.csv("data/tjet_cy.csv") |> # Import country IDs
  select(ccode_cow, region_wb, country) |>
  distinct() |> 
  filter(!(ccode_cow == 625 & region_wb == "Middle East & North Africa")) # Address Sudan code
regions <- subset(regions, duplicated(regions$ccode_cow) == F) # Drop ccode_cow with different country names

trials <- trials |> # Merge country IDs with trials data 
  left_join(regions, by = c("ccode_Accused" = "ccode_cow")) %>% # Merge by COW Country ID
  select(trialID, region_wb, trialType, yearStart, country) %>% # Select relevant variables 
  mutate( # Create variable for trial type
    trialType = case_when(trialType  == "international" ~ "other",
                          trialType  == "international (hybrid)" ~ "other",
                          trialType  == "foreign" ~ "other",
                          trialType  == "foreign" ~ "other",
                          trialType == "don't know" ~ "domestic", # Only 4 are "don't know" 
                          trialType == "domestic" ~ "domestic"))

accused <- read.csv("data/tjet_accused.csv") %>% # Import accusation data
  select(trialID, accusedID, SGBV, rape, sexualViolence, otherSGBV, 
         maleVictim, childVictim, LGBTQvictim) %>% # Select relevant variables 
  # Compute gender-attentive (GAT) and female victim (femaleVictim) variables
  mutate(GAT = ifelse(SGBV + rape + sexualViolence + otherSGBV + maleVictim + childVictim + LGBTQvictim > 0, 1, 0), 
         femaleVictim = ifelse(GAT == 1 & maleVictim == 0 & childVictim == 0 & LGBTQvictim == 0, 1, 0))

trials <- trials %>% # Merge accusation data with trials data
  inner_join(accused, by = "trialID") %>% # Merge by trialID
  group_by(trialID) %>% # Summarize accusations by trial
  mutate(SGBV = max(SGBV, na.rm = TRUE), # Make variable for charge type
         rape = max(rape, na.rm = TRUE),
         sexualViolence = max(sexualViolence, na.rm = TRUE), 
         otherSGBV = max(otherSGBV, na.rm = TRUE), 
         maleVictim = max(maleVictim, na.rm = TRUE), 
         childVictim = max(childVictim, na.rm = TRUE), 
         LGBTQvictim = max(LGBTQvictim, na.rm = TRUE), 
         GAT = max(GAT, na.rm = TRUE), 
         femaleVictim = max(femaleVictim, na.rm = TRUE),
         n_GAT = sum(GAT),
         n_accused = n()) %>% 
  select(-accusedID) %>% 
  ungroup() %>% 
  distinct()

sum(trials$GAT) # Total gender-attentive trials
mean(trials$GAT) # Percent of gender-attentive trials

# Truth Commissions 

tcs <- read.csv("data/tjet_tcs.csv") # Import truth commissions data
tcs <- tcs |> filter(yearPassed < 2021) # Select pre-2021 years with complete data
tcs <- tcs |> mutate(cy_key = ccode_cow * 10000 + yearPassed) # Define key in tcs
tcs <- subset(tcs, tcs$cy_key %in%  dtr_aco_key) # Select observations
tcs <- tcs %>% mutate( # Compute gender-attentive variables
  genderConsulted = as.numeric(consultedGroups %in% # Make indicator for women/feminist group consulted by TC
                                 c("ethnic minorities; indigenous groups; women / feminist groups",
                                   "children's representatives; women / feminist groups",
                                   "ethnic minorities; women / feminist groups")),
  GAT = genderConsulted + 
    SGBV + # binary for whether TC addressed SGBV
    genderReform # binary for whether TC proposed gender attentive reforms
)
nrow(tcs) # Total number of truth commissions in transitional contexts
sum(tcs$GAT > 0) # Total number of gender-attentive TCs 

# Reparations 

reparations <- read.csv("data/tjet_reparations.csv") # Load reparations data
reparations <- reparations |> filter(yearCreated < 2021) # Select pre-2021 years with complete data
reparations <- reparations |> mutate(cy_key = ccode_cow * 10000 + yearCreated) # Define key
reparations <- subset(reparations, reparations$cy_key %in%  dtr_aco_key) # Subset based on key
nrow(reparations) # Total number of TJ reparations policies
reparations <- reparations %>% mutate(
  genderAttentive_sum = # Sum of gender-attentive components
    harmsSexualViolence + 
    as.numeric(genderCrimes == "yes") + 
    as.numeric(lgbtqCrimes == "yes")) 
nrow(reparations) # Total number of reparations policies
sum(reparations$genderAttentive_sum > 0) # Total number of gender-attentive reparations policies

# 3a. Emergence ----

# Trials

trials_dom <- trials |> # Summarize data on domestic trials
  filter(trialType == "domestic") |> 
  group_by(yearStart) |>
  summarize(
    trialType = "Domestic Trials",
    n = n(),
    n_accused = sum(n_accused),
    n_GAT = sum(GAT),
    GAT = mean(GAT)
  )
trials_intl <- trials |> # Summarize data on international trials
  filter(trialType == "other") |> 
  group_by(yearStart) |>
  summarize(
    trialType = "International Trials",
    n = n(),
    n_accused = sum(n_accused),
    n_GAT = sum(GAT),
    GAT = mean(GAT)
  )
trials_graph_data <- rbind(trials_dom, trials_intl) # Combine data
trials_graph_data |> # Plot all data
  ggplot(aes(x = yearStart, y = n, lty = trialType, color = trialType)) + 
  geom_line(lwd = 0.75) + 
  scale_linetype_manual(values=c(1,2)) +  
  scale_color_manual(values=c("#CC6666", "#9999CC")) + 
  theme_bw() + 
  ylab("Number of Trials") + xlab("Year") +
  theme_bw() + 
  theme(legend.position= "bottom", 
        legend.title=element_blank(), 
        legend.text = element_text(size=12),
        strip.text = element_text(size = 16),
        plot.title = element_text(hjust = 0.5, size=14), 
        text = element_text(size = 12),
        axis.title = element_text(size = 12), 
        axis.text = element_text(size = 12)) 
ggsave("trials_over_time.jpeg", plot = last_plot(), width = 5, height = 4, units = "in") # Save

trials_graph_data |> # Plot GAT data
  ggplot(aes(x = yearStart, y = n_GAT, lty = trialType, color = trialType)) + 
  geom_line(lwd = 0.5) + 
  scale_linetype_manual(values=c(1,2)) +  
  scale_color_manual(values=c("#CC6666", "#9999CC")) + 
  theme_bw() + 
  geom_line() + 
  theme_bw() + 
  ylab("Number of Gender-Attentive Trials") + xlab("Year") +
  theme_bw() + 
  theme(legend.position= "bottom", 
        legend.title=element_blank(), 
        legend.text = element_text(size=12),
        strip.text = element_text(size = 16),
        plot.title = element_text(hjust = 0.5, size=14), 
        text = element_text(size = 12),
        axis.title = element_text(size = 12), 
        axis.text = element_text(size = 12)) 
ggsave("gat_trials_over_time.jpeg", plot = last_plot(), width = 5, height = 4, units = "in")

# GAT emergence by court type
gat_intl_trials <- subset(trials, GAT > 0 & trialType == "other") # First GAT int'l trials
head(gat_intl_trials[order(gat_intl_trials$yearStart),], n = 20)
gat_dom_trials <- subset(trials, GAT > 0 & trialType == "domestic") # First GAT domestic trials
length(gat_dom_trials$region_wb[gat_dom_trials$yearStart < 1994])
table(gat_dom_trials$region_wb[gat_dom_trials$yearStart < 1994])
head(gat_dom_trials[order(gat_dom_trials$yearStart),], n = 20)

# Truth Commissions 

# Early TC cases
tcs <- left_join(tcs, regions, by = "ccode_cow") # Merge in country name to tcs dataset
vars_to_print <- c("ccode_cow", "country", "yearPassed", "GAT") # Select variables to print
nongat_tcs <- subset(tcs[,vars_to_print], GAT == 0) # Print early non-GAT TCs
head(nongat_tcs[order(nongat_tcs$yearPassed),], n = 20)
gat_tcs <- subset(tcs[,vars_to_print], GAT > 0) # Print early GAT TCs
head(gat_tcs[order(gat_tcs$yearPassed),], n = 15)

# Graph TCs over time
vars_to_print <- c("ccode_cow", "country",  "GAT", "yearBeginOperation", "yearPassed", "yearCompleteOperation")
tcs_graph_data <- tcs[,vars_to_print] # Subset data
tcs_graph_data$yearCompleteOperation[is.na(tcs_graph_data$yearCompleteOperation)] <- 2020 # if yearCompleteOperation == NA, TC is ongoing 
gat_tcs_years_active <- numeric() # Make list of years each GAT TC is active 
for(i in 1:sum(tcs_graph_data$GAT > 0)){
  temp_vals <- tcs_graph_data$yearPassed[tcs_graph_data$GAT > 0][i]:tcs_graph_data$yearCompleteOperation[tcs_graph_data$GAT > 0][i]  
  gat_tcs_years_active <- c(gat_tcs_years_active, temp_vals)}
nongat_tcs_years_active <- numeric() # Make list of years each non-GAT TC is active 
for(i in 1:sum(tcs_graph_data$GAT == 0)){
  temp_vals <- tcs_graph_data$yearPassed[tcs_graph_data$GAT == 0][i]:tcs_graph_data$yearCompleteOperation[tcs_graph_data$GAT == 0][i]  
  nongat_tcs_years_active <- c(nongat_tcs_years_active, temp_vals)}
tcs_graph_data <- tibble( # Create base df for all years and treatment groups
  year = rep(1970:2020, 2),
  GAT = c(rep(0, length(1970:2020)),rep(1, length(1970:2020)))
)
tcs_graph_data <- tcs_graph_data |> mutate(id = year * 10 + GAT) # Make ID
nongat_tcs_years_active <- as.data.frame(table(nongat_tcs_years_active)) # Tabulate and coerce to dataframe 
gat_tcs_years_active <- as.data.frame(table(gat_tcs_years_active))
colnames(gat_tcs_years_active) <- colnames(nongat_tcs_years_active) <- c("year", "n_active") # Rename variables 
gat_tcs_years_active$GAT <- 1 # Make GAT indicator variable 
nongat_tcs_years_active$GAT <- 0
tcs_years_active <- rbind(gat_tcs_years_active, nongat_tcs_years_active) # Combine
tcs_years_active$year <- as.numeric(as.character(tcs_years_active$year)) # Coerce factor to numeric
tcs_years_active <- tcs_years_active |> mutate(id = year * 10 + GAT) # Make ID
tcs_graph_data <- tcs_graph_data |> left_join(tcs_years_active[,c("id", "n_active")], by = c("id")) # Merge
tcs_graph_data$n_active[is.na(tcs_graph_data$n_active)] <- 0 # Set NA to 0 
tcs_graph_data$label <- "Gender-Attentive" # Create labels
tcs_graph_data$label[tcs_graph_data$GAT == 0] <- "Not Gender-Attentive"
ggplot(tcs_graph_data, aes(x = year, y = n_active, fill = label)) + # Plot
  scale_fill_manual(values = c("darkgrey", "lightgrey")) + 
  geom_area(position = 'stack') + 
  #geom_line() + 
  ylab("Number of Active Truth Commissions") + xlab("Year") +
  theme_bw() + 
  theme(legend.position= "bottom", 
        legend.title=element_blank(), 
        legend.text = element_text(size=12),
        strip.text = element_text(size = 16),
        plot.title = element_text(hjust = 0.5, size=14), 
        text = element_text(size = 12),
        axis.title = element_text(size = 12), 
        axis.text = element_text(size = 12)) 
ggsave("tcs_over_time.jpeg", plot = last_plot(), width = 7, height = 4, units = "in")

# Since 2000 majority of TCs are gender-attentive
sum(tcs_graph_data$n_active[tcs_graph_data$year > 2000 & tcs_graph_data$GAT == 1])/sum(tcs_graph_data$n_active[tcs_graph_data$year > 2000])

# Reparations

# Early Reparations cases
reparations <- left_join(reparations, regions, by = "ccode_cow") # Merge in country name to tcs dataset
vars_to_print <- c("ccode_cow", "country", "yearCreated", "genderAttentive_sum") # Select variables to prine
nongat_reparations <- subset(reparations[,vars_to_print], genderAttentive_sum == 0) # Print early non-GAT TCs
head(nongat_reparations[order(nongat_reparations$yearCreated),], n = 20)
gat_reparations <- subset(reparations[,vars_to_print], genderAttentive_sum > 0) # Print early GAT TCs
head(gat_reparations[order(gat_reparations$yearCreated),], n = 15)

# Bar chart of reparations
dat1 <- tibble(year = 1970:2020, gat = "Gender-Attentive") # Extract data for GAT policies
gat_dat <- as.data.frame(table(gat_reparations$yearCreated))
colnames(gat_dat) <- c("year", "val")
gat_dat$year <- as.numeric(as.character(gat_dat$year))
dat1 <- left_join(dat1, gat_dat, by = "year")
dat2 <- tibble(year = 1970:2020, gat = "Not Gender-Attentive") # Extract data for non-GAT policies
nongat_dat <- as.data.frame(table(nongat_reparations$yearCreated))
colnames(nongat_dat) <- c("year", "val")
nongat_dat$year <- as.numeric(as.character(nongat_dat$year))
dat2 <- left_join(dat2, nongat_dat, by = "year")
rep_graph_dat <- rbind(dat1, dat2) # Combine
rep_graph_dat$val[is.na(rep_graph_dat$val)] <- 0 # Fix NAs
ggplot(rep_graph_dat, aes(x = year, y = val, fill = gat)) + # Plot
  scale_fill_manual(values = c("darkgrey", "lightgrey")) + 
  geom_bar(position = 'stack', stat="identity") + 
  ylab("Number of New Reparations Policies") + xlab("Year") +
  theme_bw() + 
  theme(legend.position= "bottom", 
        legend.title=element_blank(), 
        legend.text = element_text(size=12),
        strip.text = element_text(size = 16),
        plot.title = element_text(hjust = 0.5, size=14), 
        text = element_text(size = 12),
        axis.title = element_text(size = 12), 
        axis.text = element_text(size = 12)) 
ggsave("rep_over_time.jpeg", plot = last_plot(), width = 7, height = 4, units = "in")

# Pre-post 1998 (e.g., year South Africa adopts GAT reparations)
cut_year <- 1998
sum(reparations$genderAttentive_sum[reparations$yearCreated <= cut_year] > 0) 
sum(reparations$genderAttentive_sum[reparations$yearCreated <= cut_year] == 0) 
sum(reparations$genderAttentive_sum[reparations$yearCreated > cut_year] > 0)
sum(reparations$genderAttentive_sum[reparations$yearCreated > cut_year] == 0) 


# 3b. Content ----

# Trials 

# Variation of GAT trials by charges
sum(trials$GAT == 1)
sum(trials$GAT == 1 & trials$SGBV == 1)
sum(trials$GAT == 1 & trials$SGBV == 1 & trials$rape == 1)
sum(trials$GAT == 1 & trials$SGBV == 1 & trials$rape == 0) 

# Year of emergence
summary(trials$yearStart[trials$GAT == 1 & trials$SGBV == 1 & trials$rape == 1])
summary(trials$yearStart[trials$GAT == 1 & trials$SGBV == 1 & trials$rape == 0])

# Variation of GAT trials by victim
sum(trials$GAT == 1 & trials$femaleVictim == 1) 
sum(trials$GAT == 1 & trials$childVictim == 1) 
sum(trials$GAT == 1 & trials$maleVictim == 1) 
sum(trials$GAT == 1 & trials$LGBTQvictim == 1)

# All cases with female victim are SGVB
sum(trials$GAT == 1 & trials$SGBV == 1 & trials$femaleVictim == 1) == sum(trials$GAT == 1 & trials$femaleVictim == 1)

# Year of emergence (note median for femaleVictim is less than others)
summary(trials$yearStart[trials$GAT == 1 & trials$femaleVictim == 1])
summary(trials$yearStart[trials$GAT == 1 & trials$childVictim == 1])
summary(trials$yearStart[trials$GAT == 1 & trials$maleVictim == 1])
summary(trials$yearStart[trials$GAT == 1 & trials$LGBTQvictim == 1])

# Total GAT trials by victim and charges
colSums(subset(trials[,c("SGBV", "rape", "sexualViolence",  
                         "femaleVictim", "maleVictim", "childVictim", "LGBTQvictim", 
                         "GAT")], trials$GAT > 0))

# Percent GAT trials by victim and charges
colMeans(subset(trials[,c("SGBV", "rape", "sexualViolence",  
                          "femaleVictim", "maleVictim", "childVictim", "LGBTQvictim", 
                          "GAT")], trials$GAT > 0))
colMeans(subset(gat_dom_trials[,c("SGBV", "rape", "sexualViolence",  
                                  "femaleVictim", "maleVictim", "childVictim", "LGBTQvictim")]))
colMeans(subset(gat_intl_trials[,c("SGBV", "rape", "sexualViolence",  
                                   "femaleVictim", "maleVictim", "childVictim", "LGBTQvictim")]))

# Truth Commissions

# Variation in TCs by nature of GAT
sum(tcs$GAT > 0)
sum(tcs$GAT > 0 & tcs$SGBV > 0)
sum(tcs$GAT > 0 & tcs$genderConsulted > 0)
sum(tcs$GAT > 0 & tcs$genderReform > 0)

# TCs by gender reform type
tcs_gender_reform <- subset(tcs[c("country", "yearPassed", "genderReform", "genderReformType")], genderReform == 1)
head(tcs_gender_reform[order(tcs_gender_reform$yearPassed),], n = nrow(tcs_gender_reform))

# TCs by gender consulted group type
tcs$country[tcs$GAT > 0 & tcs$genderConsulted > 0]
tcs$country[tcs$GAT > 0 & tcs$consultedGroups != ""]


# Reparations

# Prevalence of attention to sexual violence/harms in GAT reparations policies
sum(reparations$harmsSexualViolence[reparations$genderAttentive_sum > 0])
sum(reparations$genderAttentive_sum > 0)
mean(reparations$harmsSexualViolence[reparations$genderAttentive_sum > 0])
table(reparations$sexualViolenceType[reparations$genderAttentive_sum > 0])

# Summary of other gender attentive reparations variabls
vars_list <- c("region_wb", 
               "country",
               "yearCreated",
               "harmsSexualViolence",
               "genderAttentive",
               "genderCrimes",
               "genderWomen",
               "genderMen", 
               "sexualViolenceType",
               "lgbtqCrimes",
               "individualReparations",
               "genderAttentive_sum")
View(subset(reparations[vars_list], genderAttentive_sum > 0)) # View all GAT policies 
sum(reparations$genderAttentive_sum > 0) # Total number of GAT reparations policies
sum(reparations$harmsSexualViolence[reparations$genderAttentive_sum > 0]) # Total number of GAT reparations policies focused on sexual violence
table(reparations$sexualViolenceType)
table(reparations$sexualViolenceType[reparations$genderAttentive_sum > 0])

# Variation in gender attentive reparations policies by group

# Women
sum(reparations$genderCrimes[reparations$genderAttentive_sum > 0] == "yes") # Total gender crimes
sum(reparations$genderWomen[reparations$genderAttentive_sum > 0] == "yes") # Total gender women crimes

# Men
reparations$country[which(reparations$genderMen == "yes" & reparations$genderAttentive_sum > 0)]
reparations$yearCreated[which(reparations$genderMen == "yes" & reparations$genderAttentive_sum > 0)]

# Child recruitment
sum(reparations$harmsChildRecruitment[reparations$genderAttentive_sum > 0])
reparations$country[which(reparations$harmsChildRecruitment == 1 & reparations$genderAttentive_sum > 0)]
reparations$yearCreated[which(reparations$harmsChildRecruitment == 1 & reparations$genderAttentive_sum > 0)]

# LGBQI+
reparations$country[which(reparations$lgbtqCrimes == "yes" & reparations$genderAttentive_sum > 0)]
reparations$yearCreated[which(reparations$lgbtqCrimes == "yes" & reparations$genderAttentive_sum > 0)]

# Children
reparations$country[which(reparations$lgbtqCrimes == "yes" & reparations$genderAttentive_sum > 0)]
reparations$yearCreated[which(reparations$lgbtqCrimes == "yes" & reparations$genderAttentive_sum > 0)]


# 4. By Policy-Context (Analyses in Appendix) ----

# Trials 

trials <- read.csv("data/tjet_trials.csv") # Load full trials data set
trials <- trials |> filter(yearStart < 2021) # Select pre-2021 years with complete data
nrow(trials) # Total number of trials 
sum(trials$anyStateAgent) # Total number of trials of state agents 
summary(trials$yearStart) # minimum/maximum year 
trials <- trials |> # Select transitional justice trials by policy-context
  filter(fitsConflictTJ == 1 | # Indicator for post-conflict TJ 
           fitsPostAutocraticTJ == 1) # Indicator for post-autocracy TJ 
nrow(trials) # Total number of TJ trials 

regions <- read.csv("data/tjet_cy.csv") |> # Import country IDs
  select(ccode_cow, region_wb, country) |>
  distinct() |> 
  filter(!(ccode_cow == 625 & region_wb == "Middle East & North Africa")) # Address Sudan code
regions <- subset(regions, duplicated(regions$ccode_cow) == F) # Drop ccode_cow with different country names

trials <- trials |> # Merge country IDs with trials data 
  left_join(regions, by = c("ccode_Accused" = "ccode_cow")) %>% # Merge by COW Country ID
  select(trialID, region_wb, trialType, yearStart, country) %>% # Select relevant variables 
  mutate( # Create variable for trial type
    trialType = case_when(trialType  == "international" ~ "other",
                          trialType  == "international (hybrid)" ~ "other",
                          trialType  == "foreign" ~ "other",
                          trialType  == "foreign" ~ "other",
                          trialType == "don't know" ~ "domestic", # Only 4 are "don't know" 
                          trialType == "domestic" ~ "domestic"))

accused <- read.csv("data/tjet_accused.csv") %>% # Import accusation data
  select(trialID, accusedID, SGBV, rape, sexualViolence, otherSGBV, 
         maleVictim, childVictim, LGBTQvictim) %>% # Select relevant variables 
  # Compute gender-attentive (GAT) and female victim (femaleVictim) variables
  mutate(GAT = ifelse(SGBV + rape + sexualViolence + otherSGBV + maleVictim + childVictim + LGBTQvictim > 0, 1, 0), 
         femaleVictim = ifelse(GAT == 1 & maleVictim == 0 & childVictim == 0 & LGBTQvictim == 0, 1, 0))

trials <- trials %>% # Merge accusation data with trials data
  inner_join(accused, by = "trialID") %>% # Merge by trialID
  group_by(trialID) %>% # Summarize accusations by trial
  mutate(SGBV = max(SGBV, na.rm = TRUE), # Make variable for charge type
         rape = max(rape, na.rm = TRUE),
         sexualViolence = max(sexualViolence, na.rm = TRUE), 
         otherSGBV = max(otherSGBV, na.rm = TRUE), 
         maleVictim = max(maleVictim, na.rm = TRUE), 
         childVictim = max(childVictim, na.rm = TRUE), 
         LGBTQvictim = max(LGBTQvictim, na.rm = TRUE), 
         GAT = max(GAT, na.rm = TRUE), 
         femaleVictim = max(femaleVictim, na.rm = TRUE),
         n_GAT = sum(GAT),
         n_accused = n()) %>% 
  select(-accusedID) %>% 
  ungroup() %>% 
  distinct()

sum(trials$GAT) # Total gender-attentive trials
mean(trials$GAT) # Percent of gender-attentive trials

# Truth Commissions 

tcs <- read.csv("data/tjet_tcs.csv") # Import truth commissions data
tcs <- tcs |> filter(yearPassed < 2021) # Select pre-2021 years with complete data
nrow(tcs) # Total number of truth commissions 
summary(tcs$yearPassed)
tcs <- subset(tcs, # Subset to transitional contexts
              fitsConflictTJ == 1 | # Indicator for post-conflict TJ
                fitsPostAutocraticTJ == 1) # Indicator for post-autocracy TJ
tcs <- tcs %>% mutate( # Compute gender-attentive variables
  genderConsulted = as.numeric(consultedGroups %in% # Make indicator for women/feminist group consulted by TC
                                 c("ethnic minorities; indigenous groups; women / feminist groups",
                                   "children's representatives; women / feminist groups",
                                   "ethnic minorities; women / feminist groups")),
  GAT = genderConsulted + 
    SGBV + # binary for whether TC addressed SGBV
    genderReform # binary for whether TC proposed gender attentive reforms
)
nrow(tcs) # Total number of truth commissions in transitional contexts
sum(tcs$GAT > 0) # Total number of gender-attentive TCs 

# Reparations 

reparations <- read.csv("data/tjet_reparations.csv")
reparations <- reparations |> filter(yearCreated < 2021) # Select pre-2021 years with complete data
nrow(reparations) # Total number of reparations policies
reparations <- subset(reparations, # Subset 
                      fitsConflictTJ == 1 | # Indicator for post-conflict TJ
                        fitsPostAutocraticTJ == 1) # Indicator for post-autocracy TJ
nrow(reparations) # Total number of TJ reparations policies
reparations <- reparations %>% mutate(
  genderAttentive_sum = # Sum of gender-attentive components
    harmsSexualViolence + 
    as.numeric(genderCrimes == "yes") + 
    as.numeric(lgbtqCrimes == "yes")) 
nrow(reparations) # Total number of reparations policies
sum(reparations$genderAttentive_sum > 0) # Total number of gender-attentive reparations policies


# 4a. Emergence ----

# Trials

trials_dom <- trials |> # Summarize data on domestic trials
  filter(trialType == "domestic") |> 
  group_by(yearStart) |>
  summarize(
    trialType = "Domestic Trials",
    n = n(),
    n_accused = sum(n_accused),
    n_GAT = sum(GAT),
    GAT = mean(GAT)
  )
trials_intl <- trials |> # Summarize data on international trials
  filter(trialType == "other") |> 
  group_by(yearStart) |>
  summarize(
    trialType = "International Trials",
    n = n(),
    n_accused = sum(n_accused),
    n_GAT = sum(GAT),
    GAT = mean(GAT)
  )
trials_graph_data <- rbind(trials_dom, trials_intl) # Combine data
trials_graph_data |> # Plot all data
  ggplot(aes(x = yearStart, y = n, lty = trialType, color = trialType)) + 
  geom_line(lwd = 0.75) + 
  scale_linetype_manual(values=c(1,2)) +  
  scale_color_manual(values=c("#CC6666", "#9999CC")) + 
  theme_bw() + 
  ylab("Number of Trials") + xlab("Year") +
  theme_bw() + 
  theme(legend.position= "bottom", 
        legend.title=element_blank(), 
        legend.text = element_text(size=12),
        strip.text = element_text(size = 16),
        plot.title = element_text(hjust = 0.5, size=14), 
        text = element_text(size = 12),
        axis.title = element_text(size = 12), 
        axis.text = element_text(size = 12)) 
ggsave("trials_over_time_policy-context_coding.jpeg", plot = last_plot(), width = 5, height = 4, units = "in") # Save

trials_graph_data |> # Plot GAT data
  ggplot(aes(x = yearStart, y = n_GAT, lty = trialType, color = trialType)) + 
  geom_line(lwd = 0.5) + 
  scale_linetype_manual(values=c(1,2)) +  
  scale_color_manual(values=c("#CC6666", "#9999CC")) + 
  theme_bw() + 
  geom_line() + 
  theme_bw() + 
  ylab("Number of Gender-Attentive Trials") + xlab("Year") +
  theme_bw() + 
  theme(legend.position= "bottom", 
        legend.title=element_blank(), 
        legend.text = element_text(size=12),
        strip.text = element_text(size = 16),
        plot.title = element_text(hjust = 0.5, size=14), 
        text = element_text(size = 12),
        axis.title = element_text(size = 12), 
        axis.text = element_text(size = 12)) 
ggsave("gat_trials_over_time_policy-context_coding.jpeg", plot = last_plot(), width = 5, height = 4, units = "in")

# GAT emergence by court type
gat_intl_trials <- subset(trials, GAT > 0 & trialType == "other") # First GAT int'l trials
head(gat_intl_trials[order(gat_intl_trials$yearStart),], n = 20)
gat_dom_trials <- subset(trials, GAT > 0 & trialType == "domestic") # First GAT domestic trials
length(gat_dom_trials$region_wb[gat_dom_trials$yearStart < 1994])
table(gat_dom_trials$region_wb[gat_dom_trials$yearStart < 1994])
head(gat_dom_trials[order(gat_dom_trials$yearStart),], n = 20)

# Truth Commissions 

# Early TC cases
tcs <- left_join(tcs, regions, by = "ccode_cow") # Merge in country name to tcs dataset
vars_to_print <- c("ccode_cow", "country", "yearPassed", "GAT") # Select variables to print
nongat_tcs <- subset(tcs[,vars_to_print], GAT == 0) # Print early non-GAT TCs
head(nongat_tcs[order(nongat_tcs$yearPassed),], n = 20)
gat_tcs <- subset(tcs[,vars_to_print], GAT > 0) # Print early GAT TCs
head(gat_tcs[order(gat_tcs$yearPassed),], n = 15)

# Graph TCs over time
vars_to_print <- c("ccode_cow", "country",  "GAT", "yearBeginOperation", "yearPassed", "yearCompleteOperation")
tcs_graph_data <- tcs[,vars_to_print] # Subset data
tcs_graph_data$yearCompleteOperation[is.na(tcs_graph_data$yearCompleteOperation)] <- 2020 # if yearCompleteOperation == NA, TC is ongoing 
gat_tcs_years_active <- numeric() # Make list of years each GAT TC is active 
for(i in 1:sum(tcs_graph_data$GAT > 0)){
  temp_vals <- tcs_graph_data$yearPassed[tcs_graph_data$GAT > 0][i]:tcs_graph_data$yearCompleteOperation[tcs_graph_data$GAT > 0][i]  
  gat_tcs_years_active <- c(gat_tcs_years_active, temp_vals)}
nongat_tcs_years_active <- numeric() # Make list of years each non-GAT TC is active 
for(i in 1:sum(tcs_graph_data$GAT == 0)){
  temp_vals <- tcs_graph_data$yearPassed[tcs_graph_data$GAT == 0][i]:tcs_graph_data$yearCompleteOperation[tcs_graph_data$GAT == 0][i]  
  nongat_tcs_years_active <- c(nongat_tcs_years_active, temp_vals)}
tcs_graph_data <- tibble( # Create base df for all years and treatment groups
  year = rep(1970:2020, 2),
  GAT = c(rep(0, length(1970:2020)),rep(1, length(1970:2020)))
)
tcs_graph_data <- tcs_graph_data |> mutate(id = year * 10 + GAT) # Make ID
nongat_tcs_years_active <- as.data.frame(table(nongat_tcs_years_active)) # Tabulate and coerce to dataframe 
gat_tcs_years_active <- as.data.frame(table(gat_tcs_years_active))
colnames(gat_tcs_years_active) <- colnames(nongat_tcs_years_active) <- c("year", "n_active") # Rename variables 
gat_tcs_years_active$GAT <- 1 # Make GAT indicator variable 
nongat_tcs_years_active$GAT <- 0
tcs_years_active <- rbind(gat_tcs_years_active, nongat_tcs_years_active) # Combine
tcs_years_active$year <- as.numeric(as.character(tcs_years_active$year)) # Coerce factor to numeric
tcs_years_active <- tcs_years_active |> mutate(id = year * 10 + GAT) # Make ID
tcs_graph_data <- tcs_graph_data |> left_join(tcs_years_active[,c("id", "n_active")], by = c("id")) # Merge
tcs_graph_data$n_active[is.na(tcs_graph_data$n_active)] <- 0 # Set NA to 0 
tcs_graph_data$label <- "Gender-Attentive" # Create labels
tcs_graph_data$label[tcs_graph_data$GAT == 0] <- "Not Gender-Attentive"
ggplot(tcs_graph_data, aes(x = year, y = n_active, fill = label)) + # Plot
  scale_fill_manual(values = c("darkgrey", "lightgrey")) + 
  geom_area(position = 'stack') + 
  #geom_line() + 
  ylab("Number of Active Truth Commissions") + xlab("Year") +
  theme_bw() + 
  theme(legend.position= "bottom", 
        legend.title=element_blank(), 
        legend.text = element_text(size=12),
        strip.text = element_text(size = 16),
        plot.title = element_text(hjust = 0.5, size=14), 
        text = element_text(size = 12),
        axis.title = element_text(size = 12), 
        axis.text = element_text(size = 12)) 
ggsave("tcs_over_time_policy-context_coding.jpeg", plot = last_plot(), width = 7, height = 4, units = "in")

# Since 2000 majority of TCs are gender-attentive
sum(tcs_graph_data$n_active[tcs_graph_data$year > 2000 & tcs_graph_data$GAT == 1])/sum(tcs_graph_data$n_active[tcs_graph_data$year > 2000])

# Reparations

# Early Reparations cases
reparations <- left_join(reparations, regions, by = "ccode_cow") # Merge in country name to tcs dataset
vars_to_print <- c("ccode_cow", "country", "yearCreated", "genderAttentive_sum") # Select variables to prine
nongat_reparations <- subset(reparations[,vars_to_print], genderAttentive_sum == 0) # Print early non-GAT TCs
head(nongat_reparations[order(nongat_reparations$yearCreated),], n = 20)
gat_reparations <- subset(reparations[,vars_to_print], genderAttentive_sum > 0) # Print early GAT TCs
head(gat_reparations[order(gat_reparations$yearCreated),], n = 15)

# Bar chart of reparations
dat1 <- tibble(year = 1970:2020, gat = "Gender-Attentive") # Extract data for GAT policies
gat_dat <- as.data.frame(table(gat_reparations$yearCreated))
colnames(gat_dat) <- c("year", "val")
gat_dat$year <- as.numeric(as.character(gat_dat$year))
dat1 <- left_join(dat1, gat_dat, by = "year")
dat2 <- tibble(year = 1970:2020, gat = "Not Gender-Attentive") # Extract data for non-GAT policies
nongat_dat <- as.data.frame(table(nongat_reparations$yearCreated))
colnames(nongat_dat) <- c("year", "val")
nongat_dat$year <- as.numeric(as.character(nongat_dat$year))
dat2 <- left_join(dat2, nongat_dat, by = "year")
rep_graph_dat <- rbind(dat1, dat2) # Combine
rep_graph_dat$val[is.na(rep_graph_dat$val)] <- 0 # Fix NAs
ggplot(rep_graph_dat, aes(x = year, y = val, fill = gat)) + # Plot
  scale_fill_manual(values = c("darkgrey", "lightgrey")) + 
  geom_bar(position = 'stack', stat="identity") + 
  ylab("Number of New Reparations Policies") + xlab("Year") +
  theme_bw() + 
  theme(legend.position= "bottom", 
        legend.title=element_blank(), 
        legend.text = element_text(size=12),
        strip.text = element_text(size = 16),
        plot.title = element_text(hjust = 0.5, size=14), 
        text = element_text(size = 12),
        axis.title = element_text(size = 12), 
        axis.text = element_text(size = 12)) 
ggsave("rep_over_time_policy-context_coding.jpeg", plot = last_plot(), width = 7, height = 4, units = "in")

# Pre-post 1998 (e.g., year South Africa adopts GAT reparations)
cut_year <- 1998
sum(reparations$genderAttentive_sum[reparations$yearCreated <= cut_year] > 0) 
sum(reparations$genderAttentive_sum[reparations$yearCreated <= cut_year] == 0) 
sum(reparations$genderAttentive_sum[reparations$yearCreated > cut_year] > 0)
sum(reparations$genderAttentive_sum[reparations$yearCreated > cut_year] == 0) 


# 4b. Content ----

# Trials 

# Variation of GAT trials by charges
sum(trials$GAT == 1)
sum(trials$GAT == 1 & trials$SGBV == 1)
sum(trials$GAT == 1 & trials$SGBV == 1 & trials$rape == 1)
sum(trials$GAT == 1 & trials$SGBV == 1 & trials$rape == 0) 

# Year of emergence
summary(trials$yearStart[trials$GAT == 1 & trials$SGBV == 1 & trials$rape == 1])
summary(trials$yearStart[trials$GAT == 1 & trials$SGBV == 1 & trials$rape == 0])

# Variation of GAT trials by victim
sum(trials$GAT == 1 & trials$femaleVictim == 1) 
sum(trials$GAT == 1 & trials$childVictim == 1) 
sum(trials$GAT == 1 & trials$maleVictim == 1) 
sum(trials$GAT == 1 & trials$LGBTQvictim == 1)

# All cases with female victim are SGVB
sum(trials$GAT == 1 & trials$SGBV == 1 & trials$femaleVictim == 1) == sum(trials$GAT == 1 & trials$femaleVictim == 1)

# Year of emergence (note median for femaleVictim is less than others)
summary(trials$yearStart[trials$GAT == 1 & trials$femaleVictim == 1])
summary(trials$yearStart[trials$GAT == 1 & trials$childVictim == 1])
summary(trials$yearStart[trials$GAT == 1 & trials$maleVictim == 1])
summary(trials$yearStart[trials$GAT == 1 & trials$LGBTQvictim == 1])

# Total GAT trials by victim and charges
colSums(subset(trials[,c("SGBV", "rape", "sexualViolence",  
                         "femaleVictim", "maleVictim", "childVictim", "LGBTQvictim", 
                         "GAT")], trials$GAT > 0))

# Percent GAT trials by victim and charges
colMeans(subset(trials[,c("SGBV", "rape", "sexualViolence",  
                          "femaleVictim", "maleVictim", "childVictim", "LGBTQvictim", 
                          "GAT")], trials$GAT > 0))
colMeans(subset(gat_dom_trials[,c("SGBV", "rape", "sexualViolence",  
                                  "femaleVictim", "maleVictim", "childVictim", "LGBTQvictim")]))
colMeans(subset(gat_intl_trials[,c("SGBV", "rape", "sexualViolence",  
                                   "femaleVictim", "maleVictim", "childVictim", "LGBTQvictim")]))

# Truth Commissions

# Variation in TCs by nature of GAT
sum(tcs$GAT > 0)
sum(tcs$GAT > 0 & tcs$SGBV > 0)
sum(tcs$GAT > 0 & tcs$genderConsulted > 0)
sum(tcs$GAT > 0 & tcs$genderReform > 0)

# TCs by gender reform type
tcs_gender_reform <- subset(tcs[c("country", "yearPassed", "genderReform", "genderReformType")], genderReform == 1)
head(tcs_gender_reform[order(tcs_gender_reform$yearPassed),], n = nrow(tcs_gender_reform))

# TCs by gender consulted group type
tcs$country[tcs$GAT > 0 & tcs$genderConsulted > 0]
tcs$country[tcs$GAT > 0 & tcs$consultedGroups != ""]


# Reparations

# Prevalence of attention to sexual violence/harms in GAT reparations policies
sum(reparations$harmsSexualViolence[reparations$genderAttentive_sum > 0])
sum(reparations$genderAttentive_sum > 0)
mean(reparations$harmsSexualViolence[reparations$genderAttentive_sum > 0])
table(reparations$sexualViolenceType[reparations$genderAttentive_sum > 0])

# Summary of other gender attentive reparations variabls
vars_list <- c("region_wb", 
               "country",
               "yearCreated",
               "harmsSexualViolence",
               "genderAttentive",
               "genderCrimes",
               "genderWomen",
               "genderMen", 
               "sexualViolenceType",
               "lgbtqCrimes",
               "individualReparations",
               "genderAttentive_sum")
View(subset(reparations[vars_list], genderAttentive_sum > 0)) # View all GAT policies 
sum(reparations$genderAttentive_sum > 0) # Total number of GAT reparations policies
sum(reparations$harmsSexualViolence[reparations$genderAttentive_sum > 0]) # Total number of GAT reparations policies focused on sexual violence
table(reparations$sexualViolenceType)
table(reparations$sexualViolenceType[reparations$genderAttentive_sum > 0])

# Variation in gender attentive reparations policies by group

# Women
sum(reparations$genderCrimes[reparations$genderAttentive_sum > 0] == "yes") # Total gender crimes
sum(reparations$genderWomen[reparations$genderAttentive_sum > 0] == "yes") # Total gender women crimes

# Men
reparations$country[which(reparations$genderMen == "yes" & reparations$genderAttentive_sum > 0)]
reparations$yearCreated[which(reparations$genderMen == "yes" & reparations$genderAttentive_sum > 0)]

# Child recruitment
sum(reparations$harmsChildRecruitment[reparations$genderAttentive_sum > 0])
reparations$country[which(reparations$harmsChildRecruitment == 1 & reparations$genderAttentive_sum > 0)]
reparations$yearCreated[which(reparations$harmsChildRecruitment == 1 & reparations$genderAttentive_sum > 0)]

# LGBQI+
reparations$country[which(reparations$lgbtqCrimes == "yes" & reparations$genderAttentive_sum > 0)]
reparations$yearCreated[which(reparations$lgbtqCrimes == "yes" & reparations$genderAttentive_sum > 0)]

# Children
reparations$country[which(reparations$lgbtqCrimes == "yes" & reparations$genderAttentive_sum > 0)]
reparations$yearCreated[which(reparations$lgbtqCrimes == "yes" & reparations$genderAttentive_sum > 0)]
